import numpy as np
import pandas as pd
import scipy.stats as stats
from .table import simple_table
import abc

"""
Environment during development:
        numpy :         1.18.5
        scipy :         1.5.0
        Date  :         2021/12/7
"""

class model(metaclass=abc.ABCMeta):

    @abc.abstractmethod
    def fit(self):
        pass
    
    @abc.abstractmethod
    def predict(self):
        pass

    @abc.abstractmethod
    def summary(self):
        pass


class Linear_Regression(model):
    """
    Ordinary least squares Linear Regression

    method
    ----------

    Demo code
    ----------

    >>> from sklearn.datasets import load_diabetes
    >>> model = Linear_Regression()
    >>> model.fit(data_diabetes['data'], data_diabetes['target'])
    >>> model.summary()

    Out:
                                                    OLS Summary Tabel
    ==============================================================================================================================
        R-Square =             0.6746        Number of obs =             442
        SSR =             2621009.1244             df =                  10       
        SSE =             1263983.1563             df =                  431
        SST =             3884992.2807             df =                  441   
        F statistic =        89.3726             Prob > F                0.0
    ------------------------------------------------------------------------------------------------------------------------------
        variable               coef                   t                 P > |t|              Std.Err         95% Conf.Interval
    ------------------------------------------------------------------------------------------------------------------------------
            x1                -10.0122              -0.1676              0.5665               59.7492        [-14.6969  -5.3275]
            x2                -239.8191             -3.9172              0.9999               61.2223       [-244.6193 -235.0189]
            x3                519.8398              7.8132                 0.0                66.5336        [514.6231 525.0564]
            x4                324.3904              4.9584                 0.0                65.4219        [319.2609 329.5199]
            x5                -792.1842             -1.9012               0.971              416.6839       [-824.8548 -759.5135]
            x6                476.7458              1.4062               0.0802              339.0345        [450.1634 503.3283]
            x7                101.0446              0.4754               0.3174              212.5326        [ 84.3807 117.7085]
            x8                177.0642              1.0965               0.1367              161.4756        [164.4035 189.7249]
            x9                751.2793              4.3704                 0.0                171.902        [737.8011 764.7575]
            x10                67.6254              1.0249                0.153               65.9842         [62.4518 72.799 ]
        intercept            152.1335              59.0614                0.0                2.5759         [151.9315 152.3354]
    ==============================================================================================================================

    """
    def __init__(self, with_constant=True):
        self.with_constant = with_constant
        
    def fit(self, X, y):

        self.example_num, self.variable_num = X.shape
        
        if self.with_constant:
            c = np.ones((self.example_num ,1))
            X = np.hstack([X, c])
            self.X = X
        else:
            self.X = np.array(X)
        self.y = np.array(y)

        if type(X)==pd.DataFrame:
            self.variable_names = X.columns.values
        else:
            self.variable_names = []
            for i in range(X.shape[1]):
                self.variable_names.append('x' + str(i + 1))

        X = np.asmatrix(self.X)
        y = np.asmatrix(self.y).reshape(-1,1)
             
        # compute coef with intercept
        coef = np.linalg.inv(X.T * X) * X.T * y
        
        self.coef = coef

        self.residuals = self.compute_residuals(X, y)
        self.ssr, self.sse, self.R_2 = self.__compute_R_2()
        self.residuals_var = self.__compute_variance_of_residual()
        self.std_e_beta = self.__compute_std_e_beta()
        self.t = self.__compute_t_statistic()
        self.f = self.__compute_f_statistic()
        self.__compute_beta_confint()
    
    def predict(self, x:np.array):
        x = np.array(x)
        y_hat = x.dot(self.coef)
        return np.asarray(y_hat)
    
    def compute_residuals(self, x, y):
        return y - self.predict(x)
    
    def __compute_R_2(self):

        sse = np.sum(np.power(self.residuals, 2))
        ssr = np.sum(np.power(self.y - np.mean(self.y), 2))
        return ssr, sse, float(ssr/(sse+ssr))
    
    def __compute_variance_of_residual(self):
        return self.sse/(self.example_num - self.variable_num - 1)

    def __compute_t_statistic(self):
        return np.divide(self.coef, self.__compute_std_e_beta())
    
    def __compute_std_e_beta(self):
        return np.sqrt(self.residuals_var * np.diagonal(np.linalg.inv(np.matmul(np.transpose(self.X), self.X)))).reshape(self.variable_num + 1,1)
    
    def __compute_f_statistic(self):
        f = self.ssr*(self.example_num - self.variable_num - 1) /( self.sse * self.variable_num)
        return f

    def __compute_beta_confint(self):
        self.confint = []
        df = self.example_num - self.variable_num - 1
        for i in range(self.variable_num+1):
            lwr = float(self.coef[i]) - np.abs(stats.t.ppf(0.05, df=df)) * float(self.std_e_beta[i])/np.sqrt(self.example_num)
            upr = float(self.coef[i]) + np.abs(stats.t.ppf(0.05, df=df)) * float(self.std_e_beta[i])/np.sqrt(self.example_num)
            self.confint.append([lwr,upr])

    def __Adj_R_square(self):
        # TODO: Calculate Adjusted R-Square
        pass

    def summary(self):
        table  = simple_table(title= 'OLS Summary Tabel')
        table.add_line(style='double')
        table.add_row(['R-Square = ', np.round(self.R_2, 4), 'Number of obs = ', self.example_num])
        table.add_row(['SSR = ', np.round(self.ssr, 4), 'df = ', self.variable_num])
        table.add_row(["SSE = ", np.round(self.sse, 4), 'df = ', self.example_num - self.variable_num - 1])
        table.add_row(["SST = ", np.round(self.sse+self.ssr, 4), 'df = ', self.example_num - 1])
        p_value_f = 1-stats.f.cdf(self.f,dfn=self.variable_num, dfd=self.example_num-self.variable_num-1)
        p_value_f = np.round(p_value_f,4)
        table.add_row(["F statistic = ", np.round(self.f, 4),'Prob > F', p_value_f])
        table.add_line(style='single')
        table.add_row(['variable','coef', 't', 'P > |t|',' Std.Err','95% Conf.Interval'])
        table.add_line(style='single')

        for i in range(self.variable_num+1):
            cofint = np.round(self.confint[i], 4)
            t = np.round(float(self.t[i]), 4)
            p_value_t = 1 - stats.t.cdf(t, df=self.example_num-self.variable_num - 1)
            p_value_t = np.round(p_value_t, 4)
            if i == self.variable_num:
                table.add_row(['intercept', np.round(float(self.coef[i]),4), t, p_value_t, np.round(float(self.std_e_beta[i]), 4),cofint] ) 
            else:
                table.add_row([self.variable_names[i], np.round(float(self.coef[i]),4), t, p_value_t, np.round(float(self.std_e_beta[i]),4), cofint]) 
        table.add_line(style='double')
        print(table)


if __name__ == "__main__":
    from sklearn.datasets import load_diabetes
    from sklearn.linear_model import  LinearRegression
    data_diabetes = load_diabetes()

    model = Linear_Regression()
    model.fit(data_diabetes['data'], data_diabetes['target'])
    print(data_diabetes['data'].shape)
    model.summary()
    M = LinearRegression()
    M.fit(data_diabetes['data'], data_diabetes['target'])
    print(M.coef_)
    print(M.intercept_)